# Start log file for script -----------------------------------------------
TeachingDemos::txtStart("./log files/figure4_full_prep.txt")

# Load Data ---------------------------------------------------------
# individual data
load("./data/impdat1920.Race.WeekWeight.rdata")
load("./data/impdat20.Race.WeekWeight.rdata")

vars <- c("year", "week", "date_mdy", "ideo_mod", "ideo_con", "age_cat4", "edu_cat4", "race3", "sex",
          "census_region", "broughtwebsite2", "WeekWeights", "D_biep.White_Good_all")
impdat19_sub <- subset(impdat1920.WeekWeight, select = vars)
impdat20_sub <- subset(impdat20.WeekWeight, select = vars)

impdat_comb <- rbind(impdat19_sub, impdat20_sub)

# Models ----------------------------------
m_week1920 <- lm(D_biep.White_Good_all ~ 
                 as.factor(week)*year + ideo_mod + ideo_con +
                 age_cat4 + edu_cat4 + race3 + sex +
                 census_region + broughtwebsite2,
               data = impdat_comb, weights = WeekWeights,
               subset = date_mdy != "2019-05-25" & date_mdy != "2020-05-25")
summary(m_week1920)

# Prediction Data Sets ------------------------------------------------
# GF is week 21 [index 22] (a Monday)
weeks19 <- names(table(impdat19_sub$week[which(!is.na(impdat19_sub$WeekWeights))]))
weeks20 <- names(table(impdat20_sub$week[which(!is.na(impdat20_sub$D_biep.White_Good_all))]))
weeks_pred <- weeks20[weeks20 %in% weeks19]
pdat19 <- data.frame(week = weeks_pred,
                   ideo_mod = 0,
                   ideo_con = 0,
                   race3 = "White",
                   age_cat4 = "18-29",
                   edu_cat4 = "Post-Grad",
                   sex = "f",
                   census_region = "South",
                   broughtwebsite2 = "Assignment school/work")
pdat20 <- data.frame(week = weeks_pred,
                     ideo_mod = 0,
                     ideo_con = 0,
                     race3 = "White",
                     age_cat4 = "18-29",
                     edu_cat4 = "Post-Grad",
                     sex = "f",
                     census_region = "South",
                     broughtwebsite2 = "Assignment school/work")
# combine
pdat <- data.frame(week = weeks_pred,
                   year = c(rep(2019, length(weeks_pred)), rep(2020, length(weeks_pred))),
                   ideo_mod = 0,
                   ideo_con = 0,
                   race3 = "White",
                   age_cat4 = "18-29",
                   edu_cat4 = "Post-Grad",
                   sex = "f",
                   census_region = "South",
                   broughtwebsite2 = "Assignment school/work")

# Save DF for Plot -----------------------------------------------------------------
preds <- predict(m_week1920, pdat, se.fit = T)

p_dat <- data.frame(year = c(rep("2019", length(weeks_pred)), rep("2020", length(weeks_pred))),
                    week = weeks_pred,
                    pred = preds$fit,
                    se = preds$se.fit)
p_dat$week <- as.Date(paste(2020, p_dat$week, 1, sep="-"), "%Y-%U-%u")
p_dat$week[c(1,54)] <- "2020-01-01" # from 00 weeks
p_dat$pred[which(p_dat$week == "2020-11-23" & p_dat$year == "2020")] <- NA
p_dat$se[which(p_dat$week == "2020-11-23" & p_dat$year == "2020")] <- NA
p_dat$pred[which(p_dat$week == "2020-11-30" & p_dat$year == "2020")] <- NA
p_dat$se[which(p_dat$week == "2020-11-30" & p_dat$year == "2020")] <- NA

p_dat_Fsamp <- p_dat


# In-Text Summaries -------------------------------------------------------
# week post to week prior comparison effect size
(p_dat$pred[which(p_dat$week == "2020-06-01" & p_dat$year == "2020")] - p_dat$pred[which(p_dat$week == "2020-05-18" & p_dat$year == "2020")])/sd(impdat20_sub$D_biep.White_Good_all ,na.rm = T)
# 2020 vs 2019 same week effect size
(p_dat$pred[which(p_dat$week == "2020-06-01" & p_dat$year == "2020")] - p_dat$pred[which(p_dat$week == "2020-06-01" & p_dat$year == "2019")])/sd(impdat20_sub$D_biep.White_Good_all ,na.rm = T)

# End log file for script -------------------------------------------------
TeachingDemos::txtStop()